With untargeted LC-MS, metabolites were detected under both negative and positive polarities. To prevent double counting metabolites in my analysis, I took the mean of each polarities across all samples and selected the metabolite that had the strongest ion intensity under that polarity.
#Read in required libraries
library("reshape")
#library(plyr)
library("dplyr")
library("tidyverse")
library("ggplot2")
library("arsenal")
library("Rmisc")
library(gridExtra)
library(ggpubr)
library(factoextra)
library(ropls)
library(mixOmics)
library(tidyverse)
library(ggplot2)
library(RColorBrewer)
library(lme4)
library(lmerTest)
library(car)
library(effects)
library(ggfortify)
library(cowplot)
library(vegan)
library(corrr)
library(ggcorrplot)
library(GGally)
library(broom)
library(cowplot)
library(RVAideMemoire)
library(arsenal)
library(patchwork)
library(tidyr)
#Import data
sample.info <- read.csv("../data/Metabolomics/Porites-Kevin_SampleInfo.csv")
peak.pos <- read.csv("../data/Metabolomics/Peaks_Pos.csv")
peak.neg <- read.csv("../data/Metabolomics/Peaks_Neg.csv")
#Removing unncessesary columns
peak.pos2 <- peak.pos[ -c(1:9)]
peak.pos3 <- peak.pos2[ -c(2:9)] # if we need the blanks, change the 9 to 6
peak.neg2 <- peak.neg[ -c(1:9)]
peak.neg3 <- peak.neg2[ -c(2:9)] # if we need the blanks, change the 9 to 6
#### Selecting polarity based on higher signal intensity ###
#obtain row means
pos.mean <- data.frame(ID=peak.pos3[,1], Means=rowMeans(peak.pos3[,-1]))
neg.mean <- data.frame(ID=peak.neg3[,1], Means=rowMeans(peak.neg3[,-1]))
# Comparing polarity means of each metabolite, x = pos, y = neg
cmp <- comparedf(pos.mean, neg.mean, by = "ID",
tol.factor = "labels") # match only factor labels
n.diffs(cmp) #58 shared metabolites
## [1] 58
list.diffs <- as.data.frame(do.call(cbind, diffs(cmp))) #creating a dataframe with shared metabolites as a list
df.diffs <- data.frame(matrix(unlist(list.diffs), nrow=n.diffs(cmp), byrow=F),stringsAsFactors=FALSE) #converting list to dataframe
names(df.diffs)[1:7] <- c("var.x", "var.y", "ID", "values.x", "values.y", "row.x", "row.y") #changing column names
i <- c(4, 5) #specifying values column
df.diffs[ , i] <- apply(df.diffs[ , i], 2, # changing values column to numeric
function(x) as.numeric(as.character(x)))
df.diffs$selected.pol <- ifelse(df.diffs$values.x > df.diffs$values.y, 'x',
ifelse(df.diffs$values.x < df.diffs$values.y, 'y', 'tie')) #selecting polarity with highest mean
x.diff.keep <- df.diffs %>%
filter(selected.pol == "x") #extracting x selection
y.diff.keep <- df.diffs %>%
filter(selected.pol == "y") #extracting y selection
x.all.keep <- peak.pos3[!(peak.pos3$compound %in% y.diff.keep$ID),] #removing rows where the metabolite is higher in neg df
y.all.keep <- peak.neg3[!(peak.neg3$compound %in% x.diff.keep$ID),] #removing rows where the metabolite is higher in pos df
# Re-shaping dataset
peak.pos4<- melt(x.all.keep, id= "compound") #melting dataset
peak.neg4<- melt(y.all.keep, id= "compound") #melting dataset
# adding polarity
peak.pos4$polarity <- "positive"
peak.neg4$polarity <- "negative"
# Binding positive and negative datasets together
peak.all <- rbind(peak.pos4, peak.neg4)
# Renaming column
names(peak.all)[2] <- "Sample.ID"
names(peak.all)[3] <- "Raw.IonCount"
# Merging weight sample info
peak.all2 <- merge(peak.all, sample.info, by = "Sample.ID")
#Normalization by weight
peak.all2$Raw.IonCount <- as.numeric(as.character(peak.all2$Raw.IonCount))
peak.all2$Norm.IonCount <- peak.all2$Raw.IonCount / peak.all2$Weight.mg
#Selecting columns of interest
peak.all3 <- peak.all2 %>% dplyr::select(Sample.ID, compound, Fragment.ID, Time, Treatment, Norm.IonCount)
#Reformatting dataframe so compounds are listed as column headers
peak.all4 <- peak.all3 %>% spread(compound, Norm.IonCount)
#adding 1000 to all variable to account for 0 values and log normalization
#Log normalization (https://www.intechopen.com/books/metabolomics-fundamentals-and-applications/processing-and-visualization-of-metabolomics-data-using-r)
peak.all5 <- log((peak.all4[5:184] + 1000), 2)
norm.data <- cbind(peak.all4[1:4], peak.all5)
Code inspired by: - http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/118-principal-component-analysis-in-r-prcomp-vs-princomp/ - https://github.com/urol-e5/timeseries/blob/master/time_series_analysis/integration_biological.Rmd line 2465
scaled_pca <-prcomp(norm.data[c(5:184)], scale=TRUE, center=TRUE)
fviz_eig(scaled_pca)
coral_info<-norm.data[c(3,4)]
pca_data<- scaled_pca%>%
augment(coral_info)%>%
group_by(Time, Treatment)%>%
mutate(PC1.mean = mean(.fittedPC1),
PC2.mean = mean(.fittedPC2))
pca.centroids<- pca_data%>%
dplyr::select(Time, Treatment, PC1.mean, PC2.mean)%>%
dplyr::group_by(Time, Treatment)%>%
dplyr::summarise(PC1.mean = mean(PC1.mean),
PC2.mean = mean(PC2.mean))
#Examine PERMANOVA results.
# scale data
vegan <- scale(norm.data[c(5:184)])
# PerMANOVA
permanova<-adonis(vegan ~ Time*Treatment, data = norm.data, method='eu')
z_pca<-permanova$aov.tab
z_pca
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Time 2 875.0 437.52 2.8045 0.11048 0.001 ***
## Treatment 2 696.0 348.02 2.2308 0.08788 0.001 ***
## Time:Treatment 4 732.7 183.17 1.1741 0.09251 0.157
## Residuals 36 5616.3 156.01 0.70912
## Total 44 7920.0 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Assemble plot with background points
#1. make plot with dots
#adding percentages on axis
names(pca_data)[4] <- "PCA1"
names(pca_data)[5] <- "PCA2"
percentage <- round((scaled_pca$sdev^2) / sum((scaled_pca$sdev^2)) * 100, 2)
percentage <- paste( colnames(pca_data[4:50]), "(", paste(as.character(percentage), "%", ")", sep="") )
#setting up data to add polygons
pca_data$Time.Treatment <- paste(pca_data$Time, pca_data$Treatment)
find_hull <- function(pca_data) pca_data[chull(pca_data$PCA1, pca_data$PCA2), ]
hulls <- ddply(pca_data, "Time.Treatment", find_hull)
PCA<-ggplot(pca_data, aes(PCA1, PCA2, color=Treatment)) +
geom_point(size = 4, alpha=0.2, aes(shape = Time))+
scale_colour_manual(values=c("#46008B", "#8B0046", "#468B00")) +
scale_fill_manual(values=c("#46008B", "#8B0046", "#468B00")) +
scale_shape_manual(values=c(15, 17, 19)) +
theme_classic()+
ylim(-10,10)+
xlim(-15,15)+
ylab(percentage[2])+
xlab(percentage[1])+
geom_text(x=9, y=-8.25, label=paste("p(Time)=", z_pca$`Pr(>F)`[1]), size=7, color=ifelse(z_pca$`Pr(>F)`[1] < 0.05, "black", "darkgray")) +
geom_text(x=9, y=-9, label=paste("p(Treatment)=", z_pca$`Pr(>F)`[2]), size=7, color=ifelse(z_pca$`Pr(>F)`[2] < 0.05, "black", "darkgray")) +
geom_text(x=9, y=-9.75, label=paste("p(Time x Treatment)=", z_pca$`Pr(>F)`[3]), size=7, color=ifelse(z_pca$`Pr(>F)`[3] < 0.05, "black", "darkgray")) +
theme(legend.text = element_text(size=17),
legend.position="none",
plot.background = element_blank(),
legend.title = element_text(size=22),
plot.margin = margin(1, 1, 1, 1, "cm"),
axis.text = element_text(size=27),
title = element_text(size=25, face="bold"),
axis.title = element_text(size=30))
#Add centroids
#2. add centroids
PCAcen<-PCA + geom_polygon(data = hulls, alpha = 0.2, aes(color = Treatment, fill = Treatment, lty = Time)) +
geom_point(aes(x=PC1.mean, y=PC2.mean,color=Treatment, shape = Time), data=pca.centroids, size=4, show.legend=FALSE) +
scale_linetype_manual(values = c("solid", "dashed", "dotted")) +
scale_colour_manual(values=c("#46008B", "#8B0046", "#468B00"), breaks = c("Control_Ambient","Bleached_Hot", "Mortality_Hot"), labels = c("Control", "Bleached", "Partial Mortality")) +
scale_fill_manual(values=c("#46008B", "#8B0046", "#468B00"), breaks = c("Control_Ambient","Bleached_Hot", "Mortality_Hot"), labels = c("Control", "Bleached", "Partial Mortality")) +
scale_shape_manual(values=c(15, 17, 19)) +
theme(legend.text = element_text(size=17),
legend.position=c(0.9,0.8),
plot.background = element_blank(),
legend.title = element_text(size=22),
plot.margin = margin(1, 1, 1, 1, "cm"),
axis.text = element_text(size=27),
title = element_text(size=25, face="bold"),
axis.title = element_text(size=30))
#Add segments
#3. add segments
segpoints<-pca.centroids%>%
gather(variable, value, -(Time:Treatment)) %>%
unite(temp, Time, variable) %>%
spread(temp, value)
PCAfull<-PCAcen +
geom_segment(aes(x = Day0_PC1.mean, y = Day0_PC2.mean, xend = Day37_PC1.mean, yend = Day37_PC2.mean, colour = Treatment), data = segpoints, size=2, show.legend=FALSE) +
geom_segment(aes(x = Day37_PC1.mean, y = Day37_PC2.mean, xend = Day52_PC1.mean, yend = Day52_PC2.mean, colour = Treatment), data = segpoints, size=2, arrow = arrow(length=unit(0.5,"cm")), show.legend=FALSE); PCAfull
ggsave(filename="../output/Metabolomics/FullPCA_metabolomics.pdf", plot=PCAfull, dpi=300, width=10, height=10, units="in")
#Control vs Bleached
norm.data_2 <- column_to_rownames(norm.data, 'Sample.ID')
D52_CvsB <- norm.data_2 %>%
filter(Time == "Day52") %>%
filter(Treatment != "Mortality_Hot")
D52_CvsB_clean <- na.omit(D52_CvsB)
#assigning datasets
X <- D52_CvsB[4:183]
Y <- as.factor(D52_CvsB$Treatment)
#summary(Y) ## class summary
#summary(X)
#dim(X) ## number of samples and features
#length(Y) ## length of class membership factor = number of samples
#PLSDA without variable selection
MyResult.plsda <- plsda(X, Y, ncomp = 2) # 1 Run the method
#plotIndiv(MyResult.plsda) # 2 Plot the samples
#plotVar(MyResult.plsda, cutoff = 0.7)
plotIndiv(MyResult.plsda, ind.names = FALSE, legend=TRUE,ellipse = TRUE, title="Day 52 - Bleached vs Control")
MyResult.plsda2 <- plsda(X,Y, ncomp=6) #number of components is #classes-1
#selectVar(MyResult.plsda2, comp=1)$value
#plotLoadings(MyResult.plsda2, comp = 1, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
#plotLoadings(MyResult.plsda2, comp = 2, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
comp1.select.metabolites.all <- data.frame(selectVar(MyResult.plsda2, comp = 1)$value)
# component validation
set.seed(200) # for reproducbility in this vignette, otherwise increase nrepeat
MyPerf.plsda <- perf(MyResult.plsda2, validation = "Mfold", folds = 5, #figure out why only folds =5 works
progressBar = FALSE, auc = TRUE, nrepeat = 10) # we suggest nrepeat = 50
plot(MyPerf.plsda, col = color.mixo(5:7), sd = TRUE, legend.position = "horizontal")
#ROC Curve
auc.plsda = auroc(MyResult.plsda2, roc.comp = 2)
## $Comp1
## AUC p-value
## Bleached_Hot vs Control_Ambient 1 0.009023
##
## $Comp2
## AUC p-value
## Bleached_Hot vs Control_Ambient 1 0.009023
##
## $Comp3
## AUC p-value
## Bleached_Hot vs Control_Ambient 1 0.009023
##
## $Comp4
## AUC p-value
## Bleached_Hot vs Control_Ambient 1 0.009023
##
## $Comp5
## AUC p-value
## Bleached_Hot vs Control_Ambient 1 0.009023
##
## $Comp6
## AUC p-value
## Bleached_Hot vs Control_Ambient 1 0.009023
#VIP Extraction
D52_CvsB_VIP <- PLSDA.VIP(MyResult.plsda2)
D52_CvsB_VIP_DF <- as.data.frame(D52_CvsB_VIP[["tab"]])
D52_CvsB_VIP_DF
## VIP
## N-acetyl-glutamine 1.8475464
## Ornithine 1.8471462
## Arginine 1.8447445
## 4-Guanidinobutanoic acid 1.8313272
## Nicotinamide riboside 1.8236378
## NADPH (SIM) 1.8188263
## Homoarginine 1.7686490
## o-acetyl-L-serine 1.7272228
## Acetyl proline 1.7261229
## Homocitrulline 1.7116100
## Itaconic acid 1.6527043
## NG-dimethyl-L-arginine 1.6502903
## Ribose phosphate 1.6390388
## Uric acid 1.5753606
## Lysine 1.5659648
## Hypoxanthine 1.5546614
## 2-Aminoethylphosphonate 1.5537645
## Glycine 1.5474496
## Arginine-Valine 1.5256292
## Histidine 1.5202824
## Acetyllysine 1.5116372
## Glutamine 1.5116076
## Asparagine 1.4973637
## O-Phosphorylethanolamine 1.4952363
## Dihydroorotate 1.4941539
## Valine 1.4804471
## Cytosine 1.4471037
## Carnosine 1.4413172
## Pyrroline-5-carboxylic acid 1.4120153
## O-Butanoylcarnitine 1.3923286
## Aminoadipic acid 1.3922921
## Alanine 1.3771535
## 5-Methoxytryptamine 1.3499814
## Stearic acid 1.3353469
## Phosphocholine 1.3054181
## Nicotinate 1.2877107
## Betaine 1.2875509
## Isoleucine 1.2811918
## Glucuronic acid 1.2266900
## N-acetyl-glutamate 1.2144519
## Phenylpropanolamine 1.2115881
## Mandelic acid 1.2103823
## Pyroglutamate 1.2016750
## Deoxycytidine 1.1982576
## L-Palmitoylcarnitine 1.1980581
## Citramalic acid 1.1775553
## N-acetyl-L-ornithine 1.1149622
## Uridine 1.1120195
## Phosphocholine (+HAc) 1.0945526
## Leucic acid 1.0908325
## Glycerate 1.0874723
## PosIS_Lysine-2H8 1.0793108
## Glutathione disulfide 1.0486646
## 5-Methylcytosine 1.0439816
## PosIS_Inosine-15N4 1.0365273
## Aspartate 1.0322076
## NegIS_Thymine-2H4 1.0310881
## UDP-N-acetyl-glucosamine 1.0212216
## PosIS_Glutamine-2H5-15N2 1.0104041
## UMP 1.0069796
## NegIS_Glucose-2H7-13C6 1.0041126
## Sucrose 0.9917345
## Lactate 0.9905264
## Methionine sulfoxide 0.9896006
## Histamine 0.9895645
## Mannose-6-phosphate 0.9881803
## Hypotaurine 0.9838006
## Adenine 0.9764535
## Hydroxyproline 0.9740385
## Phenylalanine 0.9739488
## Glycerophosphocholine (+HAc) 0.9661476
## Deoxyguanosine 0.9542160
## Cystathionine 0.9532942
## CDP-Choline 0.9495000
## PosIS_Alanine-2H4-15N 0.9461741
## N-Acetyl-L-alanine 0.9449103
## Sorbitol 0.9343901
## Arginine-Alanine 0.9221628
## D-2-Aminobutyric acid 0.9156346
## Fructose 0.9101266
## Glycerol-3-phosphate 0.9092518
## Acetyl-arginine 0.9050694
## Spermine 0.8955273
## PosIS_Serine-13C3-15N1 0.8938346
## NegIS_Glycine-13C2_15N1 0.8877996
## a-ketoglutarate 0.8818213
## Indole 0.8716574
## Glucose-6-phosphate 0.8711653
## Malate 0.8690751
## Glutathione 0.8610566
## Succinate 0.8597732
## O-Propanoylcarnitine 0.8574560
## Carbamoyl phosphate 0.8367812
## Dihydroxybenzeneacetic acid 0.8330689
## Homocysteine 0.8289893
## Aconitate 0.8157140
## Leucine 0.8091813
## Guanosine 0.8030839
## Serine 0.7987984
## Glutamate 0.7968480
## NADP+ (SIM) 0.7768044
## Methionine 0.7650715
## Hydroxyphenyl pyruvate 0.7601818
## Methylphenyllactate 0.7571953
## Adenosine 0.7544527
## Tyrosine 0.7396607
## Purine 0.7352270
## UDP-D-Glucose 0.7326338
## Tryptophan 0.7324533
## Ribitol 0.7316265
## Raffinose 0.7276736
## Dihydroxyacetone phosphate 0.7271345
## NegIS_Lysine-2H8 0.7235685
## Hippuric acid 0.7192444
## 1 3-Dimethyluric acid 0.7148183
## Threonine 0.7142418
## NegIS_Inosine-15N4 0.7138902
## Arginine-Glutamine 0.7105654
## 5- Methylthioadenosine 0.7020726
## NegIS_Glutamine-2H5-15N2 0.7013269
## Glycerophosphocholine 0.6996244
## Fructose-6-phosphate 0.6965501
## Xanthine 0.6933256
## ADP-D-Glucose 0.6915677
## 5-Hydroxytryptophan 0.6899659
## Glutaryl-carnitine 0.6840990
## N N N-Trimethyllysine 0.6822649
## Citrulline 0.6768827
## Cellobiose 0.6763358
## Carnitine 0.6667547
## Acetyl glycine 0.6642562
## Guanine 0.6513430
## Pyruvate 0.6501816
## Citrate 0.6495698
## CDP-Choline (+HAc) 0.6378132
## Cytidine 0.6333613
## Uracil 0.6319751
## NegIS_Serine-13C3-15N1 0.6252614
## Cystine 0.6232665
## NADH (SIM) 0.6171055
## CMP 0.6050429
## Tryptamine 0.6003752
## NegIS_Alanine-2H4-15N 0.5994133
## Levulinic Acid 0.5938777
## Inositol 0.5833383
## O-Acylcarnitine 0.5434109
## Ascorbic acid 0.5392257
## Hydroxyoctanoic acid 0.5341602
## Tyramine 0.5141836
## Thymidine 0.5103889
## Lysine-Glutamine 0.5080734
## GMP 0.4954510
## NegIS_Malate-2H3 0.4914272
## NAD+ (SIM) 0.4879187
## Threonic acid 0.4846377
## Creatine 0.4772946
## AMP 0.4387995
## ADP 0.4384280
## 1-Methylhistidine 0.4363119
## 3-Phenylbutyric acid 0.4277624
## Inosine 0.4138897
## O-Decanoyl-L-carnitine 0.4122824
## Trigonelline 0.4057788
## Isocitrate 0.3969700
## S-adenosyl-L-methionine 0.3743649
## 5-Phenylvaleric acid 0.3339220
## Pantothenate 0.3294431
## Tridecanoic acid 0.3086940
## gamma-amino butyrate 0.3030969
## Glucose 0.2855454
## Taurine 0.2816848
## Creatinine 0.2743437
## Thiamine 0.2639383
## Proline 0.2521395
## Methyloxovaleric acid (Ketoleucine) 0.2401009
## Cholesteryl sulfate 0.2177561
## NAD+ 0.1902808
## L-Octanoylcarnitine 0.1842706
## 11a-Hydroxyprogesterone 0.1540240
## Androstenedione 0.0000000
D52_CvsP <- norm.data_2 %>%
filter(Time == "Day52") %>%
filter(Treatment != "Bleached_Hot")
D52_CvsP_clean <- na.omit(D52_CvsP)
#assigning datasets
X <- D52_CvsP[4:183]
Y <- as.factor(D52_CvsP$Treatment)
#summary(Y) ## class summary
#summary(X)
#dim(X) ## number of samples and features
#length(Y) ## length of class membership factor = number of samples
#PLSDA without variable selection
MyResult.plsda <- plsda(X, Y, ncomp = 2) # 1 Run the method
#plotIndiv(MyResult.plsda) # 2 Plot the samples
#plotVar(MyResult.plsda, cutoff = 0.7)
plotIndiv(MyResult.plsda, ind.names = FALSE, legend=TRUE,ellipse = TRUE, title="Day 52 - Bleached vs Control")
MyResult.plsda2 <- plsda(X,Y, ncomp=6) #number of components is #classes-1
#selectVar(MyResult.plsda2, comp=1)$value
#plotLoadings(MyResult.plsda2, comp = 1, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
#plotLoadings(MyResult.plsda2, comp = 2, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
comp1.select.metabolites.all <- data.frame(selectVar(MyResult.plsda2, comp = 1)$value)
# component validation
set.seed(200) # for reproducbility in this vignette, otherwise increase nrepeat
MyPerf.plsda <- perf(MyResult.plsda2, validation = "Mfold", folds = 5, #figure out why only folds =5 works
progressBar = FALSE, auc = TRUE, nrepeat = 10) # we suggest nrepeat = 50
plot(MyPerf.plsda, col = color.mixo(5:7), sd = TRUE, legend.position = "horizontal")
#ROC Curve
auc.plsda = auroc(MyResult.plsda2, roc.comp = 2)
## $Comp1
## AUC p-value
## Control_Ambient vs Mortality_Hot 1 0.009023
##
## $Comp2
## AUC p-value
## Control_Ambient vs Mortality_Hot 1 0.009023
##
## $Comp3
## AUC p-value
## Control_Ambient vs Mortality_Hot 1 0.009023
##
## $Comp4
## AUC p-value
## Control_Ambient vs Mortality_Hot 1 0.009023
##
## $Comp5
## AUC p-value
## Control_Ambient vs Mortality_Hot 1 0.009023
##
## $Comp6
## AUC p-value
## Control_Ambient vs Mortality_Hot 1 0.009023
#VIP Extraction
D52_CvsP_VIP <- PLSDA.VIP(MyResult.plsda2)
D52_CvsP_VIP_DF <- as.data.frame(D52_CvsP_VIP[["tab"]])
D52_CvsP_VIP_DF
## VIP
## Glutamine 1.95183295
## Asparagine 1.92307540
## Alanine 1.87884065
## Dihydroorotate 1.87651653
## Ornithine 1.87189680
## N-acetyl-glutamine 1.85536266
## NADPH (SIM) 1.85321221
## Arginine 1.83891009
## Homoarginine 1.83299432
## 2-Aminoethylphosphonate 1.83223482
## Glycine 1.82048680
## 4-Guanidinobutanoic acid 1.81468206
## Pyrroline-5-carboxylic acid 1.79602119
## NG-dimethyl-L-arginine 1.77876897
## o-acetyl-L-serine 1.74075672
## UMP 1.68516324
## Acetyl proline 1.65987799
## Isoleucine 1.64581615
## O-Phosphorylethanolamine 1.63963923
## Homocysteine 1.63509267
## Cellobiose 1.62905929
## Histidine 1.60639451
## Lysine 1.59132135
## Sucrose 1.58575238
## Nicotinamide riboside 1.55778749
## Aminoadipic acid 1.53723685
## Uric acid 1.52895394
## Threonine 1.51605093
## Malate 1.51217197
## Phosphocholine 1.46409593
## Citrate 1.46139623
## Inosine 1.45563425
## Hydroxyproline 1.45342368
## Adenine 1.44939478
## Raffinose 1.43234915
## Carnosine 1.41147563
## Betaine 1.39147821
## O-Butanoylcarnitine 1.38769651
## Fructose 1.33808606
## Phosphocholine (+HAc) 1.28591820
## Glucose 1.28499677
## Isocitrate 1.27020929
## Proline 1.25794777
## Cytidine 1.21106737
## Homocitrulline 1.20177569
## Acetyl-arginine 1.20107779
## N-acetyl-L-ornithine 1.19666001
## Uridine 1.18533810
## Tryptophan 1.17126818
## Cytosine 1.15921045
## Arginine-Valine 1.14747517
## Glutamate 1.12687089
## Valine 1.12163203
## Cholesteryl sulfate 1.12004264
## Glutathione disulfide 1.11067175
## Citramalic acid 1.10601174
## Ribose phosphate 1.09233844
## Phenylpropanolamine 1.06538435
## Guanine 1.03581087
## NegIS_Thymine-2H4 1.01472122
## Leucine 1.01267682
## Glucose-6-phosphate 1.00433520
## Glucuronic acid 0.96968000
## Methylphenyllactate 0.96143956
## Methionine sulfoxide 0.95133404
## L-Octanoylcarnitine 0.94847611
## PosIS_Lysine-2H8 0.94657143
## Citrulline 0.94430892
## Purine 0.94203603
## NADP+ (SIM) 0.94184557
## Aconitate 0.93310342
## Leucic acid 0.92692049
## 3-Phenylbutyric acid 0.90413651
## gamma-amino butyrate 0.88816741
## D-2-Aminobutyric acid 0.87772485
## Cystine 0.87558238
## Succinate 0.87147178
## UDP-N-acetyl-glucosamine 0.87017478
## Stearic acid 0.86617973
## Pyruvate 0.86364769
## Inositol 0.85652167
## Glycerol-3-phosphate 0.84484992
## Aspartate 0.84408829
## Hypotaurine 0.84025624
## CMP 0.83502291
## NADH (SIM) 0.82668099
## NegIS_Serine-13C3-15N1 0.82402052
## N N N-Trimethyllysine 0.79975425
## Arginine-Glutamine 0.79583599
## Deoxycytidine 0.78823107
## Glycerate 0.78497521
## GMP 0.77161310
## Pyroglutamate 0.76390084
## Thymidine 0.75105288
## Trigonelline 0.74956969
## Lysine-Glutamine 0.74831304
## AMP 0.73590124
## Cystathionine 0.72966252
## Serine 0.72653631
## PosIS_Inosine-15N4 0.72413524
## Indole 0.72140592
## Glutaryl-carnitine 0.71072692
## Androstenedione 0.69970435
## NegIS_Glycine-13C2_15N1 0.69649511
## Arginine-Alanine 0.68746153
## Mannose-6-phosphate 0.68064189
## Itaconic acid 0.66831077
## O-Decanoyl-L-carnitine 0.63358224
## Histamine 0.62613960
## Hippuric acid 0.62455488
## Glutathione 0.61255589
## PosIS_Serine-13C3-15N1 0.61206081
## NegIS_Lysine-2H8 0.60709076
## NegIS_Glucose-2H7-13C6 0.59617948
## 5- Methylthioadenosine 0.59133989
## Tyramine 0.58033535
## Fructose-6-phosphate 0.57882260
## CDP-Choline 0.56316116
## Phenylalanine 0.54839701
## PosIS_Glutamine-2H5-15N2 0.54591353
## 5-Hydroxytryptophan 0.54101181
## NegIS_Inosine-15N4 0.53977976
## Methionine 0.52388330
## Ascorbic acid 0.50583181
## L-Palmitoylcarnitine 0.49659592
## Acetyl glycine 0.49200437
## NegIS_Glutamine-2H5-15N2 0.47921951
## 11a-Hydroxyprogesterone 0.47652810
## NAD+ (SIM) 0.46136351
## Levulinic Acid 0.45898971
## Taurine 0.45507119
## Hydroxyoctanoic acid 0.44963397
## Sorbitol 0.42287919
## N-Acetyl-L-alanine 0.41186675
## O-Propanoylcarnitine 0.41046357
## Dihydroxybenzeneacetic acid 0.41035866
## Creatinine 0.40434257
## NegIS_Malate-2H3 0.40325888
## Guanosine 0.39786977
## Lactate 0.39385549
## Hypoxanthine 0.38550512
## S-adenosyl-L-methionine 0.37936958
## 1-Methylhistidine 0.37019858
## Uracil 0.36766055
## Adenosine 0.36265274
## N-acetyl-glutamate 0.34685003
## Xanthine 0.33962026
## Spermine 0.33394092
## UDP-D-Glucose 0.32811615
## NegIS_Alanine-2H4-15N 0.32292368
## PosIS_Alanine-2H4-15N 0.31825653
## Hydroxyphenyl pyruvate 0.31578207
## Tyrosine 0.31039143
## Creatine 0.30861250
## Acetyllysine 0.30800895
## 5-Methylcytosine 0.30724032
## NAD+ 0.30428741
## 5-Phenylvaleric acid 0.29983757
## Carnitine 0.27933570
## Tridecanoic acid 0.27651233
## Tryptamine 0.26354981
## 1 3-Dimethyluric acid 0.25801469
## Deoxyguanosine 0.24832297
## 5-Methoxytryptamine 0.24828789
## ADP-D-Glucose 0.20810149
## Nicotinate 0.19656018
## Glycerophosphocholine 0.19466556
## Pantothenate 0.18802013
## ADP 0.18575476
## Dihydroxyacetone phosphate 0.17509571
## Carbamoyl phosphate 0.16183222
## Methyloxovaleric acid (Ketoleucine) 0.15785538
## O-Acylcarnitine 0.13936833
## Ribitol 0.12097866
## Glycerophosphocholine (+HAc) 0.11271632
## CDP-Choline (+HAc) 0.11117794
## Thiamine 0.11116440
## Threonic acid 0.10534756
## Mandelic acid 0.10458903
## a-ketoglutarate 0.03554707
D52_BvsP <- norm.data_2 %>%
filter(Time == "Day52") %>%
filter(Treatment != "Control_Ambient")
D52_BvsP_clean <- na.omit(D52_BvsP)
#assigning datasets
X <- D52_BvsP[4:183]
Y <- as.factor(D52_BvsP$Treatment)
#summary(Y) ## class summary
#summary(X)
#dim(X) ## number of samples and features
#length(Y) ## length of class membership factor = number of samples
#PLSDA without variable selection
MyResult.plsda <- plsda(X, Y, ncomp = 2) # 1 Run the method
#plotIndiv(MyResult.plsda) # 2 Plot the samples
#plotVar(MyResult.plsda, cutoff = 0.7)
plotIndiv(MyResult.plsda, ind.names = FALSE, legend=TRUE,ellipse = TRUE, title="Day 52 - Bleached vs Partial Mortality")
MyResult.plsda2 <- plsda(X,Y, ncomp=6) #number of components is #classes-1
#selectVar(MyResult.plsda2, comp=1)$value
#plotLoadings(MyResult.plsda2, comp = 1, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
#plotLoadings(MyResult.plsda2, comp = 2, contrib = 'max', method = 'median', ndisplay = 50) #top 50 metabolites contributing to variation on component 1
comp1.select.metabolites.all <- data.frame(selectVar(MyResult.plsda2, comp = 1)$value)
# component validation
set.seed(200) # for reproducbility in this vignette, otherwise increase nrepeat
MyPerf.plsda <- perf(MyResult.plsda2, validation = "Mfold", folds = 5, #figure out why only folds =5 works
progressBar = FALSE, auc = TRUE, nrepeat = 10) # we suggest nrepeat = 50
plot(MyPerf.plsda, col = color.mixo(5:7), sd = TRUE, legend.position = "horizontal")
#ROC Curve
auc.plsda = auroc(MyResult.plsda2, roc.comp = 2)
## $Comp1
## AUC p-value
## Bleached_Hot vs Mortality_Hot 1 0.009023
##
## $Comp2
## AUC p-value
## Bleached_Hot vs Mortality_Hot 1 0.009023
##
## $Comp3
## AUC p-value
## Bleached_Hot vs Mortality_Hot 1 0.009023
##
## $Comp4
## AUC p-value
## Bleached_Hot vs Mortality_Hot 1 0.009023
##
## $Comp5
## AUC p-value
## Bleached_Hot vs Mortality_Hot 1 0.009023
##
## $Comp6
## AUC p-value
## Bleached_Hot vs Mortality_Hot 1 0.009023
#VIP Extraction
D52_BvsP_VIP <- PLSDA.VIP(MyResult.plsda2)
D52_BvsP_VIP_DF <- as.data.frame(D52_BvsP_VIP[["tab"]])
D52_BvsP_VIP_DF
## VIP
## Asparagine 2.3403317
## N-acetyl-glutamate 2.2812787
## Stearic acid 2.1681666
## Sucrose 2.1212151
## Itaconic acid 2.0733473
## Glutaryl-carnitine 1.9873658
## Homocysteine 1.9049851
## Inosine 1.8635605
## Guanosine 1.8447103
## Aconitate 1.7704511
## Acetyllysine 1.7544757
## Cellobiose 1.7530563
## Glucose 1.7031338
## Dihydroorotate 1.6957906
## O-Propanoylcarnitine 1.6934506
## Malate 1.6695875
## Isocitrate 1.6628276
## Proline 1.6479034
## Glycerate 1.6355353
## L-Palmitoylcarnitine 1.6158436
## Threonine 1.6026816
## Thymidine 1.5252766
## Spermine 1.5164722
## 5-Methylcytosine 1.5032389
## Glycerophosphocholine (+HAc) 1.4996405
## Mandelic acid 1.4993117
## Cholesteryl sulfate 1.4804596
## Cytidine 1.4662498
## UDP-D-Glucose 1.4614837
## Hydroxyproline 1.4573431
## Raffinose 1.4304646
## Homoarginine 1.4259474
## Hypoxanthine 1.4115503
## L-Octanoylcarnitine 1.4076587
## Sorbitol 1.3965138
## Acetyl-arginine 1.3949781
## Nicotinate 1.3922016
## N-acetyl-glutamine 1.3801435
## GMP 1.3371424
## Trigonelline 1.3060575
## Pyrroline-5-carboxylic acid 1.3034550
## 3-Phenylbutyric acid 1.2917389
## 5-Methoxytryptamine 1.2818365
## Mannose-6-phosphate 1.2550524
## Carnitine 1.2071169
## Nicotinamide riboside 1.2001203
## Purine 1.1998143
## Adenine 1.1881913
## Tryptophan 1.1665866
## Pyroglutamate 1.1528613
## NADPH (SIM) 1.1516736
## Adenosine 1.1150886
## Guanine 1.1002273
## Carbamoyl phosphate 1.0687537
## Cystine 1.0677126
## NAD+ (SIM) 1.0516237
## Ribose phosphate 1.0435433
## Glycerophosphocholine 1.0368153
## Uric acid 1.0299369
## Methylphenyllactate 1.0271903
## Fructose 1.0173638
## Hydroxyphenyl pyruvate 1.0148936
## Glycine 0.9896683
## Histamine 0.9811026
## 1 3-Dimethyluric acid 0.9778179
## Androstenedione 0.9533351
## o-acetyl-L-serine 0.9452371
## Dihydroxyacetone phosphate 0.9434353
## S-adenosyl-L-methionine 0.8953292
## Glutamate 0.8855392
## Lactate 0.8720782
## Tyramine 0.8687068
## Inositol 0.8599795
## AMP 0.8540432
## a-ketoglutarate 0.8485973
## Uracil 0.8481745
## Valine 0.8418153
## Betaine 0.8205602
## 11a-Hydroxyprogesterone 0.8131395
## Arginine 0.8113272
## NegIS_Glucose-2H7-13C6 0.7961449
## Pyruvate 0.7842410
## Ribitol 0.7811872
## Methyloxovaleric acid (Ketoleucine) 0.7729055
## Citrulline 0.7495753
## NegIS_Serine-13C3-15N1 0.7395845
## Fructose-6-phosphate 0.7351441
## Lysine 0.7242599
## ADP-D-Glucose 0.7221048
## Homocitrulline 0.7028496
## 5- Methylthioadenosine 0.6952357
## NADH (SIM) 0.6931631
## CDP-Choline 0.6840125
## Levulinic Acid 0.6818591
## Ornithine 0.6713528
## Tyrosine 0.6644111
## 2-Aminoethylphosphonate 0.6500598
## NegIS_Thymine-2H4 0.6414871
## Dihydroxybenzeneacetic acid 0.6359765
## Aminoadipic acid 0.6213054
## UDP-N-acetyl-glucosamine 0.6198763
## Xanthine 0.6177301
## Hippuric acid 0.6100370
## Phosphocholine 0.6097056
## O-Decanoyl-L-carnitine 0.5894843
## NegIS_Alanine-2H4-15N 0.5892492
## UMP 0.5846745
## NG-dimethyl-L-arginine 0.5835716
## Glycerol-3-phosphate 0.5808181
## N N N-Trimethyllysine 0.5774171
## Phenylalanine 0.5753140
## N-Acetyl-L-alanine 0.5737060
## Hypotaurine 0.5725657
## O-Acylcarnitine 0.5639802
## Arginine-Alanine 0.5564164
## 4-Guanidinobutanoic acid 0.5509073
## Glucuronic acid 0.5505127
## Cytosine 0.5477215
## Citrate 0.5431613
## Threonic acid 0.5373127
## NegIS_Inosine-15N4 0.5344579
## Acetyl proline 0.5192669
## gamma-amino butyrate 0.5172393
## Phenylpropanolamine 0.5103872
## Phosphocholine (+HAc) 0.4990537
## Ascorbic acid 0.4945664
## Creatine 0.4862920
## 5-Phenylvaleric acid 0.4849747
## Isoleucine 0.4816555
## NegIS_Glutamine-2H5-15N2 0.4770023
## Taurine 0.4711647
## Indole 0.4701378
## NegIS_Glycine-13C2_15N1 0.4598130
## Glutathione 0.4589659
## Acetyl glycine 0.4433496
## Leucine 0.4285686
## CMP 0.4280790
## Uridine 0.4254490
## Glutathione disulfide 0.4250539
## Tryptamine 0.4182956
## Aspartate 0.4137877
## Glucose-6-phosphate 0.4126074
## Deoxyguanosine 0.3999495
## PosIS_Inosine-15N4 0.3987825
## Methionine 0.3985404
## Histidine 0.3981328
## Glutamine 0.3903608
## PosIS_Alanine-2H4-15N 0.3831474
## O-Butanoylcarnitine 0.3766952
## Tridecanoic acid 0.3755690
## Citramalic acid 0.3743930
## Arginine-Glutamine 0.3742309
## NAD+ 0.3723728
## PosIS_Glutamine-2H5-15N2 0.3698439
## PosIS_Serine-13C3-15N1 0.3698128
## Lysine-Glutamine 0.3674037
## O-Phosphorylethanolamine 0.3598021
## Pantothenate 0.3553003
## Serine 0.3483882
## NADP+ (SIM) 0.3275083
## N-acetyl-L-ornithine 0.3227260
## 1-Methylhistidine 0.3096755
## 5-Hydroxytryptophan 0.3075385
## Succinate 0.2951576
## CDP-Choline (+HAc) 0.2906145
## Creatinine 0.2831129
## Alanine 0.2801036
## Methionine sulfoxide 0.2785114
## Hydroxyoctanoic acid 0.2580881
## Thiamine 0.2533475
## Deoxycytidine 0.2348238
## D-2-Aminobutyric acid 0.2333163
## ADP 0.2186992
## PosIS_Lysine-2H8 0.1968858
## NegIS_Lysine-2H8 0.1865058
## Arginine-Valine 0.1658788
## Leucic acid 0.1561995
## NegIS_Malate-2H3 0.1359512
## Carnosine 0.0000000
## Cystathionine 0.0000000
####### Overlaps for VIPs >1 #########
# Converting row names to column
D52_CvsB_VIP_table <- rownames_to_column(D52_CvsB_VIP_DF, var = "Metabolite")
D52_CvsP_VIP_table <- rownames_to_column(D52_CvsP_VIP_DF, var = "Metabolite")
D52_BvsP_VIP_table <- rownames_to_column(D52_BvsP_VIP_DF, var = "Metabolite")
# Filtering for VIP > 1
D52_CvsB_VIP_1 <- D52_CvsB_VIP_table %>%
filter(VIP >= 1)
D52_CvsP_VIP_1 <- D52_CvsP_VIP_table %>%
filter(VIP >= 1)
D52_BvsP_VIP_1 <- D52_BvsP_VIP_table %>%
filter(VIP >= 1)
D52_CvsB_VIP_1 %>%
arrange(VIP) %>%
ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
geom_point() +
ylab("Metabolite") +
xlab("VIP Score") +
ggtitle("Control vs Bleached") +
theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
D52_CvsP_VIP_1 %>%
arrange(VIP) %>%
ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
geom_point() +
ylab("Metabolite") +
xlab("VIP Score") +
ggtitle("Control vs Partial Mortality") +
theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
D52_BvsP_VIP_1 %>%
arrange(VIP) %>%
ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
geom_point() +
ylab("Metabolite") +
xlab("VIP Score") +
ggtitle("Bleached vs Partial Mortality") +
theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
### Compare for Venn Diagram (https://github.com/gaospecial/ggVennDiagram)
#Open this part in Repo
# nrow(D52_CvsB_VIP_1)
# nrow(D52_CvsP_VIP_1)
#
# library("VennDiagram")
#
# venn.x <- list(
# D52_CvsB = sample(D52_CvsB_VIP_1$Metabolite),
# D52_CvsP = sample(D52_CvsP_VIP_1$Metabolite)
# )
#
# myCol <- c("#8B0046", "#468B00")
#
# venn.diagram(venn.x,
# filename = 'output/Metabolomics/D52_BvsP_venn.png',
# output = TRUE,
# height = 480,
# width = 480,
# resolution = 300,
# category.names = c("Bleached", "Partial Mortality"),
# lwd = 2,
# col = c("#8B0046", "#468B00"),
# fill = c(alpha("#8B0046",0.3), alpha("#468B00", 0.3)),
# cex = 0.5,
# fontfamily = "sans",
# fontface = "bold",
# cat.cex = 0.5,
# cat.default.pos = "outer",
# cat.pos = c(12, -12),
# cat.dist = c(-0.45, -0.45),
# cat.fontfamily = "sans",
# cat.fontface = "bold"
# )
# Control vs Bleached
## Gather data frame and group by metabolite
D52_CvB_gather <- D52_CvsB_clean %>% gather(key = "Metabolite", value = "Count", -Fragment.ID, -Time, -Treatment) %>%
dplyr::select("Treatment", "Metabolite", "Count")
D52_CvB_gather$Treatment <- as.factor(D52_CvB_gather$Treatment)
D52_CvB_gather <- dplyr::group_by(D52_CvB_gather, Metabolite)
# Select metabolites only present from the PLS-DA with VIPs >1
D52_CvB_VIP_Select <- subset(D52_CvB_gather, D52_CvB_gather$Metabolite %in% D52_CvsB_VIP_1$Metabolite)
#Looped t-test for all metabolites with VIPs >1, estimate 1 = Bleached_Hot, estimate 2 = Control
D52_CvB_t.test <-do(D52_CvB_VIP_Select, tidy(t.test(.$Count ~ .$Treatment,
alternative = "two.sided",
mu = 0,
paired = FALSE,
var.equal = FALSE,
conf.level = 0.95
)))
#adjust p value for the number of comparisons
D52_CvB_t.test$p.adj<-p.adjust(D52_CvB_t.test$p.value, method=c("fdr"), n=length(D52_CvsB_VIP_1$Metabolite)) #length = number of metabolites tested, false discovery rate method
# Filter for significantly different metabolites (p < 0.05)
D52_CvB_t.test.fdr <- D52_CvB_t.test %>% filter(p.adj <= 0.05)
length(D52_CvB_t.test.fdr$Metabolite) #10
## [1] 10
# # Filter for significantly different metabolites (p < 0.05)
# D52_CvB_t.test.sig <- D52_CvB_t.test %>% filter(p.value <= 0.05)
# length(D52_CvB_t.test.sig$Metabolite) #went from 32 to 10 with p value adjustment
# Filter for metabolites accumulated compared to control
D52_CvB_t.test.sig.up <- D52_CvB_t.test.fdr %>% filter(estimate1 > estimate2)
# Filter for metabolites depleted compared to control
D52_CvB_t.test.sig.down <- D52_CvB_t.test.fdr %>% filter(estimate1 < estimate2)
# Control vs Bleached
## Gather data frame and group by metabolite
D52_CvsP_gather <- D52_CvsP_clean %>% gather(key = "Metabolite", value = "Count", -Fragment.ID, -Time, -Treatment) %>%
dplyr::select("Treatment", "Metabolite", "Count")
D52_CvsP_gather$Treatment <- as.factor(D52_CvsP_gather$Treatment)
D52_CvsP_gather <- dplyr::group_by(D52_CvsP_gather, Metabolite)
# Select metabolites only present from the PLS-DA with VIPs >1
D52_CvsP_VIP_Select <- subset(D52_CvsP_gather, D52_CvsP_gather$Metabolite %in% D52_CvsP_VIP_1$Metabolite)
#Looped t-test for all metabolites with VIPs >1, estimate 1 = Control_Ambient, estimate 2 = Bleached_Hot
D52_CvsP_t.test <-do(D52_CvsP_VIP_Select, tidy(t.test(.$Count ~ .$Treatment,
alternative = "two.sided",
mu = 0,
paired = FALSE,
var.equal = FALSE,
conf.level = 0.95
)))
#adjust p value for the number of comparisons
D52_CvsP_t.test$p.adj<-p.adjust(D52_CvsP_t.test$p.value, method=c("fdr"), n=length(D52_CvsP_VIP_1$Metabolite)) #length = number of metabolites tested, false discovery rate method
# Filter for significantly different metabolites (p < 0.05)
D52_CvsP_t.test.fdr <- D52_CvsP_t.test %>% filter(p.adj <= 0.05)
length(D52_CvsP_t.test.fdr$Metabolite) #34
## [1] 34
# Filter for significantly different metabolites (p < 0.05)
# D52_CvsP_t.test.sig <- D52_CvsP_t.test %>% filter(p.value <= 0.05)
# length(D52_CvsP_t.test.sig$Metabolite) #went from 38 to 34 with p value adjustment
# Filter for metabolites accumulated compared to control
D52_CvsP_t.test.sig.up <- D52_CvsP_t.test.fdr %>% filter(estimate1 < estimate2)
# Filter for metabolites depleted compared to control
D52_CvsP_t.test.sig.down <- D52_CvsP_t.test.fdr %>% filter(estimate1 > estimate2)
# Bleach vs Partial Mortality
## Gather data frame and group by metabolite
D52_BvsP_gather <- D52_BvsP_clean %>% gather(key = "Metabolite", value = "Count", -Fragment.ID, -Time, -Treatment) %>%
dplyr::select("Treatment", "Metabolite", "Count")
D52_BvsP_gather$Treatment <- as.factor(D52_BvsP_gather$Treatment)
D52_BvsP_gather <- dplyr::group_by(D52_BvsP_gather, Metabolite)
# Select metabolites only present from the PLS-DA with VIPs >1
D52_BvsP_VIP_Select <- subset(D52_BvsP_gather, D52_BvsP_gather$Metabolite %in% D52_BvsP_VIP_1$Metabolite)
#Looped t-test for all metabolites with VIPs >1, estimate 1 = Bleached_Hot, estimate 2 = Mortality_Hot
D52_BvsP_t.test <-do(D52_BvsP_VIP_Select, tidy(t.test(.$Count ~ .$Treatment,
alternative = "two.sided",
mu = 0,
paired = FALSE,
var.equal = FALSE,
conf.level = 0.95
)))
#adjust p value for the number of comparisons
D52_BvsP_t.test$p.adj<-p.adjust(D52_BvsP_t.test$p.value, method=c("fdr"), n=length(D52_BvsP_VIP_1$Metabolite)) #length = number of metabolites tested, false discovery rate method
# Filter for significantly different metabolites (p < 0.05)
D52_BvsP_t.test.fdr <- D52_BvsP_t.test %>% filter(p.adj <= 0.05)
length(D52_BvsP_t.test.fdr$Metabolite) #
## [1] 0
# Filter for significantly different metabolites (p < 0.05)
# D52_CvsP_t.test.sig <- D52_CvsP_t.test %>% filter(p.value <= 0.05)
# length(D52_CvsP_t.test.sig$Metabolite) #went from 38 to 34 with p value adjustment
# Filter for metabolites accumulated compared to control
D52_CvsP_t.test.sig.up <- D52_CvsP_t.test.fdr %>% filter(estimate1 < estimate2)
# Filter for metabolites depleted compared to control
D52_CvsP_t.test.sig.down <- D52_CvsP_t.test.fdr %>% filter(estimate1 > estimate2)
# Selecting metabolites that were validated by the t-test for each up and down accumulated
D52_CvsB_VIP_sig_up <- subset(D52_CvsB_VIP_1, D52_CvsB_VIP_1$Metabolite %in% D52_CvB_t.test.sig.up$Metabolite)
D52_CvsB_VIP_sig_down <- subset(D52_CvsB_VIP_1, D52_CvsB_VIP_1$Metabolite %in% D52_CvB_t.test.sig.down$Metabolite)
D52_CvsP_VIP_sig_up <- subset(D52_CvsP_VIP_1, D52_CvsP_VIP_1$Metabolite %in% D52_CvsP_t.test.sig.up$Metabolite)
D52_CvsP_VIP_sig_down <- subset(D52_CvsP_VIP_1, D52_CvsP_VIP_1$Metabolite %in% D52_CvsP_t.test.sig.down$Metabolite)
write.csv(D52_CvsB_VIP_sig_up, "../output/Metabolomics/D52_CvsB_VIP_sig_up.csv")
write.csv(D52_CvsB_VIP_sig_down, "../output/Metabolomics/D52_CvsB_VIP_sig_down.csv")
write.csv(D52_CvsP_VIP_sig_up, "../output/Metabolomics/D52_CvsP_VIP_sig_up.csv")
write.csv(D52_CvsP_VIP_sig_down, "../output/Metabolomics/D52_CvsP_VIP_sig_down.csv")
# Plotting
D52_CvsB_VIP_UP <- D52_CvsB_VIP_sig_up %>%
arrange(VIP) %>%
ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
geom_point() +
xlim(1.2, 2) +
ylab("Accumulated") +
xlab("") +
ggtitle("Bleached") +
theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
D52_CvsB_VIP_DOWN <- D52_CvsB_VIP_sig_down %>%
arrange(VIP) %>%
ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
geom_point() +
xlim(1.2, 2) +
ylab("Depleted") +
xlab("VIP Score") +
# ggtitle("Bleached: Depleted") +
theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
D52_CvsP_VIP_UP <- D52_CvsP_VIP_sig_up %>%
arrange(VIP) %>%
ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
geom_point() +
xlim(1.2, 2) +
ylab("") +
xlab("") +
ggtitle("Partial Mortality") +
theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
D52_CvsP_VIP_DOWN <- D52_CvsP_VIP_sig_down %>%
arrange(VIP) %>%
ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
geom_point() +
xlim(1.2, 2) +
ylab("") +
xlab("VIP Score") +
# ggtitle("Partial Mortality: Depleted") +
theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"), panel.grid.major = element_blank(), #Makes background theme white
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
D52_All_VIP_plot <- plot_grid(D52_CvsB_VIP_UP, D52_CvsP_VIP_UP, D52_CvsB_VIP_DOWN, D52_CvsP_VIP_DOWN,
align = "v",
ncol = 2,
rel_heights = c(0.95, 0.35),
labels = c("A", "B", "C", "D"))
D52_All_VIP_plot
#ggsave(filename="../output/Metabolomics/Day52_VIP.pdf", plot=D52_All_VIP_plot, dpi=300, width=7, height=9, units="in")
# Up regulated metabolites that overlap between B and P
D52_overlap_BvsP_up_1 <- as.data.frame(intersect(D52_CvsB_VIP_sig_up$Metabolite, D52_CvsP_VIP_sig_up$Metabolite))
names(D52_overlap_BvsP_up_1)[1] <- 'Metabolite'
D52_overlap_BvsP_up_2 <- merge(D52_overlap_BvsP_up_1, D52_CvsB_VIP_sig_up, by="Metabolite")
D52_overlap_BvsP_up_VIP <- merge(D52_overlap_BvsP_up_2, D52_CvsP_VIP_sig_up, by="Metabolite")
names(D52_overlap_BvsP_up_VIP)[2] <- 'Bleached'
names(D52_overlap_BvsP_up_VIP)[3] <- 'Partial Mortality'
D52_overlap_BvsP_up_VIP_comb <- gather(D52_overlap_BvsP_up_VIP, key = "Group", value = "VIP", 'Bleached', 'Partial Mortality')
# Up regulated metabolites that are unique to B
D52_uniqueB_BvsP_up <- as.data.frame(setdiff(D52_CvsB_VIP_sig_up$Metabolite, D52_CvsP_VIP_sig_up$Metabolite))
names(D52_uniqueB_BvsP_up)[1] <- 'Metabolite'
D52_uniqueB_BvsP_up_VIP <- merge(D52_uniqueB_BvsP_up, D52_CvsB_VIP_sig_up, by="Metabolite")
# Up regulated metabolites that are unique to P
D52_uniqueP_BvsP_up <- as.data.frame(setdiff(D52_CvsP_VIP_sig_up$Metabolite, D52_CvsB_VIP_sig_up$Metabolite))
names(D52_uniqueP_BvsP_up)[1] <- 'Metabolite'
D52_uniqueP_BvsP_up_VIP <- merge(D52_uniqueP_BvsP_up, D52_CvsP_VIP_sig_up, by="Metabolite")
# Down regulated metabolites that overlap between B and P
D52_overlap_BvsP_down_1 <- as.data.frame(intersect(D52_CvsB_VIP_sig_down$Metabolite, D52_CvsP_VIP_sig_down$Metabolite))
names(D52_overlap_BvsP_down_1)[1] <- 'Metabolite'
D52_overlap_BvsP_down_2 <- merge(D52_overlap_BvsP_down_1, D52_CvsB_VIP_sig_down, by="Metabolite")
D52_overlap_BvsP_down_VIP <- merge(D52_overlap_BvsP_down_2, D52_CvsP_VIP_sig_down, by="Metabolite")
names(D52_overlap_BvsP_down_VIP)[2] <- 'Bleached'
names(D52_overlap_BvsP_down_VIP)[3] <- 'Partial Mortality'
D52_overlap_BvsP_down_VIP_comb <- gather(D52_overlap_BvsP_down_VIP, key = "Group", value = "VIP", 'Bleached', 'Partial Mortality')
# Down regulated metabolites that are unique to B
# There are none
# Down regulated metabolites that are unique to P
D52_uniqueP_BvsP_down <- as.data.frame(setdiff(D52_CvsP_VIP_sig_down$Metabolite, D52_CvsB_VIP_sig_down$Metabolite))
names(D52_uniqueP_BvsP_down)[1] <- 'Metabolite'
D52_uniqueP_BvsP_down_VIP <- merge(D52_uniqueP_BvsP_down, D52_CvsP_VIP_sig_down, by="Metabolite")
# Plotting
D52_uniqueB_BvsP_up_plot <- D52_uniqueB_BvsP_up_VIP %>%
arrange(VIP) %>%
ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
geom_point(size = 7, color = "#8B0046") +
xlim(1.2, 2) +
ylab("") +
xlab("VIP Score") +
# ggtitle("Bleached Unique") +
theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_text(size=17, color="black"),
title = element_text(size=17, face="bold"),
axis.title = element_text(size=17))
D52_uniqueP_BvsP_up_plot <- D52_uniqueP_BvsP_up_VIP %>%
arrange(VIP) %>%
ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
geom_point(size = 7, color = "#468B00") +
xlim(1.2, 2) +
ylab("") +
xlab("") +
# ggtitle("Partial Mortality Unique") +
theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_text(size=17, color="black"),
title = element_text(size=17, face="bold"),
axis.title = element_text(size=17))
D52_uniqueP_BvsP_down_plot <- D52_uniqueP_BvsP_down_VIP %>%
arrange(VIP) %>%
ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum))) +
geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
geom_point(size = 7, color = "#468B00") +
xlim(1.2, 2) +
ylab("") +
xlab("VIP Score") +
# ggtitle("Partial Mortality: Depleted") +
theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_text(size=17, color="black"),
title = element_text(size=17, face="bold"),
axis.title = element_text(size=17))
D52_overlap_BvsP_up_plot <- D52_overlap_BvsP_up_VIP_comb %>%
arrange(VIP) %>%
ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum), fill = Group)) +
geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
geom_point(size = 7, aes(color = Group))+
scale_colour_manual(values=c("#8B0046", "#468B00")) +
scale_fill_manual(values=c("#8B0046", "#468B00")) +
xlim(1.2, 2) +
ylab("") +
xlab("") +
# ggtitle("Bleached Overlap") +
theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_text(size=17, color="black"),
title = element_text(size=17, face="bold"),
axis.title = element_text(size=17),
legend.position = "none")
D52_overlap_BvsP_down_plot <- D52_overlap_BvsP_down_VIP_comb %>%
arrange(VIP) %>%
ggplot( aes(x = VIP, y = reorder(Metabolite,VIP,sum), fill = Group)) +
geom_hline(aes(yintercept = Metabolite), linetype = "dotted", color = "grey") +
geom_point(size = 7, aes(color = Group))+
scale_colour_manual(values=c("#8B0046", "#468B00")) +
scale_fill_manual(values=c("#8B0046", "#468B00")) +
xlim(1.2, 2) +
ylab("") +
xlab("VIP Score") +
# ggtitle("Bleached Overlap") +
theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_text(size=17, color="black"),
title = element_text(size=17, face="bold"),
axis.title = element_text(size=17),
legend.text = element_text(size = 20),
legend.title = element_text(size = 22))
D52_All_VIP_plot <- D52_uniqueB_BvsP_up_plot +
D52_overlap_BvsP_up_plot +
D52_uniqueP_BvsP_up_plot +
guide_area() +
D52_overlap_BvsP_down_plot +
D52_uniqueP_BvsP_down_plot +
plot_layout(ncol=3, heights=c(0.4,0.1), guides = "collect")
ggsave(filename="../output/Metabolomics/Day52_VIP.png", plot=D52_All_VIP_plot, dpi=300, width=20, height=9, units="in")
Day_52_VIP
library(MetaboAnalystR)
# D52 Control vs Bleached UP Overrepresentation Analysis and via Metaboanalyst 4.0
D52_CvsB_VIP_sig_up
D52_CvsB_VIP_sig_up$Metabolite[D52_CvsB_VIP_sig_up$Metabolite == "NADPH (SIM)"] <- "NADPH"
mSet<-InitDataObjects("conc", "pathora", FALSE)
cmpd.vec<-c(D52_CvsB_VIP_sig_up$Metabolite)
mSet<-Setup.MapData(mSet, cmpd.vec)
mSet<-CrossReferencing(mSet, "name")
mSet<-CreateMappingResultTable(mSet)
mSet<-PerformDetailMatch(mSet, "NG-dimethyl-L-arginine")
mSet<-GetCandidateList(mSet)
mSet<-SetCandidate(mSet, "NG-dimethyl-L-arginine", "Asymmetric dimethylarginine") #Replacing metabolite
mSet<-CreateMappingResultTable(mSet)
mSet<-SetMetabolomeFilter(mSet, F)
D52_CvsB_up_dataset <- mSet$dataSet #extracting input dataframe
D52_CvsB_up_KEGG <- as.data.frame(D52_CvsB_up_dataset$map.table) #extracting KEGG terms
write.csv(D52_CvsB_up_KEGG, "../output/Metabolomics/MetaboAnalyst_Output/D52_Comparisons/D52_CvsB_up_KEGG.csv")
## PATHWAY ENRICHMENT
#Model org: C elegans
#ORA analysis: Fisher's exact test
#Pathway topology analysis: relative betweenness centrality (rbc)
mSet_Path<-SetKEGG.PathLib(mSet, "cel", "current") #comparing again C.elegans, review parameters
mSet_Path<-SetMetabolomeFilter(mSet_Path, F)
mSet_Path<-CalculateOraScore(mSet_Path, "rbc", "hyperg") #review parameters
D52_CvsB_up_analset_KEGG <- mSet_Path$api
D52_CvsB_up_PATHWAY <- as.data.frame(D52_CvsB_up_analset_KEGG$ora.results) #extracting KEGG terms
write.csv(D52_CvsB_up_PATHWAY, "../output/Metabolomics/MetaboAnalyst_Output/D52_Comparisons/D52_CvsB_up_PATHWAY.csv")
D52_CvsB_up_PATHWAY
# PLOTTING
D52_CvsB_up_PATHWAY <- tibble::rownames_to_column(D52_CvsB_up_PATHWAY, "Pathway")
names(D52_CvsB_up_PATHWAY)[5] <- "pvalue"
names(D52_CvsB_up_PATHWAY)[6] <- "neglogp"
logsig<- -log(0.05)
pointstolabel <- D52_CvsB_up_PATHWAY %>%
filter(pvalue < 0.05)
D52_CvsB_up_PATHWAY_plot <- D52_CvsB_up_PATHWAY %>%
mutate(color = ifelse(neglogp>logsig, "red", "grey")) %>%
ggplot(aes(x=Impact, y=neglogp, size=Impact, color = "black", fill = color)) +
geom_point(shape=21) +
scale_color_identity() +
scale_fill_identity() +
geom_hline(yintercept=logsig, linetype="dashed", color = "red", size=2) +
# geom_text(data=subset(D52_CvsP_up_PATHWAY, neglogp>logsig), aes(Impact,neglogp,label=Pathway)) +
ylab("-log(p)") +
xlab("Pathway Impact") +
theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_text(size=17, color="black"),
title = element_text(size=17, face="bold"),
axis.title = element_text(size=17),
legend.position="none")
D52_CvsB_up_PATHWAY_plot
# D52 Control vs Partial Mortality UP Overrepresentation Analysis and via Metaboanalyst 4.0
D52_CvsP_VIP_sig_up
D52_CvsP_VIP_sig_up$Metabolite[D52_CvsP_VIP_sig_up$Metabolite == "NADPH (SIM)"] <- "NADPH"
mSet<-InitDataObjects("conc", "pathora", FALSE)
cmpd.vec<-c(D52_CvsP_VIP_sig_up$Metabolite)
mSet<-Setup.MapData(mSet, cmpd.vec)
mSet<-CrossReferencing(mSet, "name")
mSet<-CreateMappingResultTable(mSet)
# mSet<-PerformDetailMatch(mSet, "Acetyl Proline")
# mSet<-GetCandidateList(mSet)
mSet<-PerformDetailMatch(mSet, "NG-dimethyl-L-arginine")
mSet<-GetCandidateList(mSet)
mSet<-SetCandidate(mSet, "NG-dimethyl-L-arginine", "Asymmetric dimethylarginine")
mSet<-PerformDetailMatch(mSet, "Pyrroline-5-carboxylic acid")
mSet<-GetCandidateList(mSet)
#mSet<-SetCandidate(mSet, "Pyrroline-5-carboxylic acid", "1-Pyrroline-5-carboxylic acid") #this isnt working for some reson
mSet<-CreateMappingResultTable(mSet)
mSet<-SetMetabolomeFilter(mSet, F)
D52_CvsP_up_dataset <- mSet$dataSet #extracting input dataframe
D52_CvsP_up_KEGG <- as.data.frame(D52_CvsP_up_dataset$map.table) #extracting KEGG terms
write.csv(D52_CvsP_up_KEGG, "../output/Metabolomics/MetaboAnalyst_Output/D52_Comparisons/D52_CvsP_up_KEGG.csv")
## PATHWAY ENRICHMENT
#Model org: C elegans
#ORA analysis: Fisher's exact test
#Pathway topology analysis: relative betweenness centrality (rbc)
mSet_Path<-SetKEGG.PathLib(mSet, "cel", "current") #comparing again C.elegans, review parameters
mSet_Path<-SetMetabolomeFilter(mSet_Path, F)
mSet_Path<-CalculateOraScore(mSet_Path, "rbc", "hyperg") #review parameters
D52_CvsP_up_analset_KEGG <- mSet_Path$api
D52_CvsP_up_PATHWAY <- as.data.frame(D52_CvsP_up_analset_KEGG$ora.results) #extracting KEGG terms
write.csv(D52_CvsP_up_PATHWAY, "../output/Metabolomics/MetaboAnalyst_Output/D52_Comparisons/D52_CvsP_up_PATHWAY.csv")
D52_CvsP_up_PATHWAY
# Plotting
D52_CvsP_up_PATHWAY <- tibble::rownames_to_column(D52_CvsP_up_PATHWAY, "Pathway")
names(D52_CvsP_up_PATHWAY)[5] <- "pvalue"
names(D52_CvsP_up_PATHWAY)[6] <- "neglogp"
logsig<- -log(0.05)
pointstolabel <- D52_CvsP_up_PATHWAY %>%
filter(pvalue < 0.05)
D52_CvsP_up_PATHWAY_plot <- D52_CvsP_up_PATHWAY %>%
mutate(color = ifelse(neglogp>logsig, "red", "grey")) %>%
ggplot(aes(x=Impact, y=neglogp, size=Impact, color = "black", fill = color)) +
geom_point(shape=21) +
scale_color_identity() +
scale_fill_identity() +
geom_hline(yintercept=logsig, linetype="dashed", color = "red", size=2) +
# geom_text(data=subset(D52_CvsP_up_PATHWAY, neglogp>logsig), aes(Impact,neglogp,label=Pathway)) +
ylab("-log(p)") +
xlab("Pathway Impact") +
theme_bw() + theme(panel.border = element_rect(linetype = "solid", color = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_text(size=17, color="black"),
title = element_text(size=17, face="bold"),
axis.title = element_text(size=17),
legend.position="none")
D52_CvsP_up_PATHWAY_plot